fprintf('\n=================================================================')
fprintf('\nDESCRIPTION: VALUE_AKM.m estimates worker fixed effects, employer fixed')
fprintf('\n             effects, time fixed effects, returns to demographics, and')
fprintf('\n             additional controls based on Abowd, Kramarz, and Margolis')
fprintf('\n             (Econometrica, 1999) using the algorithm by Card,')
fprintf('\n             Heining, and Kline (QJE, 2013).')
fprintf('\n')
fprintf('\nINPUTS:      Two files in .csv format prepared in Stata:')
fprintf('\n             - a matrix "connected_dir" with two (2) columns or variables')
fprintf('\n               ordered as: employer ID, lagged employer ID.')
fprintf('\n             - a matrix "input_dir" with seven (7) columns or variables')
fprintf('\n               ordered as: income, person ID, employer ID, year,')
fprintf('\n               education, age, lagged employer ID.')
fprintf('\n')
fprintf('\nOUTPUTS:     File containing worker fixed effects, employer fixed')
fprintf('\n             effects, time fixed effects, and returns to')
fprintf('\n             demographics.')
fprintf('\n')
fprintf('\nNOTES:       - Due to changes in ichol() command, requires')
fprintf('\n             MATLAB R2011a or later.')
fprintf('\n             - Requires MATLAB R2016b or later for implicit expansion.')
fprintf('\n             - Memory required: app. 40GB.')
fprintf('\n')
fprintf('\n=================================================================')
fprintf('\n')


fprintf('\n==============================================================')
fprintf('\n OPENING HOUSEKEEPING')
fprintf('\n==============================================================')
fprintf('\n  --> start timer\n')
tic

fprintf('\n  --> clear memory\n')
clear

fprintf('\n  --> verify MATLAB version\n')
v = version;
v_old = (str2double(v(1:3)) < 9.1); % If MATLAB is older than version R2016b
if v_old
    fprintf('\nUSER WARNING: Implicit expansions require MATLAB R2016b or later version. Run slower routine instead.\n')
end
clear v


fprintf('\n==============================================================')
fprintf('\n SET DIRECTORIES AND PARAMETERS')
fprintf('\n==============================================================')
fprintf('\n  --> set paths and macros not passed from Stata\n')
try_path_4 = 'E:/project_CBFW/int_files'; % same as $files in 0.MASTER.do
if exist(try_path_4, 'dir')
    DIR_INPUT = try_path_4;
    DIR_OUTPUT = try_path_4;
    DIR_LOG = 'E:/project_CBFW/log_files'; % same as $logs in 0.MASTER.do
else
    fprintf('\nUSER ERROR: Directory not found.\n')
    exit
end

fprintf('\n  --> read parameters\n')
params_dir = [DIR_INPUT '/parameters_akm.csv']; % Path to file containing AKM parameters.
file_params = fopen(params_dir);
data_params = fscanf(file_params, '%f %f %f %f %f %f %f %f %f %f %f %f %f %f', [14 inf])';
fclose(file_params);
year_min = data_params(:, 1); % Minimum year.
fprintf('\n')
disp(['year_min = ' num2str(year_min)])
year_max = data_params(:, 2); % Maximum year.
fprintf('\n')
disp(['year_max = ' num2str(year_max)])
age_poly_order = data_params(:, 3); % Age polynomial order (0 = no age controls; 1 = restricted profile of age indicators; 2 / 3 / etc. = include 2nd order term / 2nd and 3rd order terms / etc.).
fprintf('\n')
disp(['age_poly_order = ' num2str(age_poly_order)])
age_flat_min = data_params(:, 4); % Minimum age for which income-age profile is restricted to be flat (only relevant if age_poly_order == 1).
fprintf('\n')
disp(['age_flat_min = ' num2str(age_flat_min)])
age_flat_max = data_params(:, 5); % Maximum age for which income-age profile is restricted to be flat (only relevant if age_poly_order == 1).
fprintf('\n')
disp(['age_flat_max = ' num2str(age_flat_max)])
age_norm = data_params(:, 6); % Normalization age.
fprintf('\n')
disp(['age_norm = ' num2str(age_norm)])
age_min = data_params(:, 7); % Minimum age.
fprintf('\n')
disp(['age_min = ' num2str(age_min)])
age_max = data_params(:, 8); % Maximum age.
fprintf('\n')
disp(['age_max = ' num2str(age_max)])
edu_inter = data_params(:, 9); % Education interactions (0 = no; 1 = yes).
fprintf('\n')
disp(['edu_inter = ' num2str(edu_inter)])
akm_hours = data_params(:, 10); % Hours controls (0 = no; 1 = yes).
fprintf('\n')
disp(['akm_hours = ' num2str(akm_hours)])
akm_occ = data_params(:, 11); % Occupation controls (0 = no; 1 = yes).
fprintf('\n')
disp(['akm_occ = ' num2str(akm_occ)])
akm_tenure = data_params(:, 12); % Tenure controls (0 = no; 1 = yes).
fprintf('\n')
disp(['akm_tenure = ' num2str(akm_tenure)])
akm_exp_act = data_params(:, 13); % Actual experience controls (0 = no; 1 = yes).
fprintf('\n')
disp(['akm_exp_act = ' num2str(akm_exp_act)])
ext = data_params(:, 14); % File name extension string.
fprintf('\n')
disp(['ext = ' num2str(ext)])
if age_poly_order == 1 && ((age_flat_min >= age_flat_max || year_min + 1 >= year_max) || (age_flat_min >= age_max || age_flat_max <= age_min))
    fprintf('\nUSER ERROR: Requested to estimate age effects (age_poly_order = 1) but restricted-to-be-flat age region is too short or falls outside of selected age range.\n')
    exit
elseif age_poly_order >= 2 && (age_norm < age_min || age_norm > age_max)
    fprintf('\nUSER ERROR: Requested to estimate higher-order age terms (age_poly_order >= 2) but normalization age falls outside of selected age range.\n')
    exit
end
clear file_params age_min age_max data_params

fprintf('\n  --> set read and write paths\n')
input_dir = [DIR_INPUT '/tomatlab_' num2str(year_min) '_' num2str(year_max) '_' num2str(ext) '.csv']; % Path to file with Stata input to estimate AKM equation: income, person_ID, employer_ID, year (, age).
output_dir = [DIR_OUTPUT '/tostata_' num2str(year_min) '_' num2str(year_max) '_' num2str(ext) '.txt']; % Path to file where MATLAB output is exported to.
stop_dir = [DIR_OUTPUT '/stopakm_' num2str(ext) '.txt']; %Path to file to tell Stata to continue running
clear DIR_INPUT DIR_OUTPUT year_min year_max

fprintf('\n  --> start diary (log) file\n')
FILE_LOG = [DIR_LOG, '/log_akm_matlab_', num2str(ext), '.log'];
eval(['diary ''', FILE_LOG, ''''])
clear DIR_LOG FILE_LOG ext


fprintf('\n==============================================================')
fprintf('\n LOAD DATA')
fprintf('\n==============================================================')
fprintf('\n  --> read input data\n')
input_file = fopen(input_dir, 'r');
input_format = '%f %f %f %u'; % always load at least 4 variables: inc, persid, empid, year
input_n = 4;
if age_poly_order
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
if edu_inter
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
if akm_hours
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
if akm_occ
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
if akm_tenure
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
if akm_exp_act
    input_format = [input_format, ' %u'];
    input_n = input_n + 1;
end
fprintf('\n')
disp(['number of variables in input data = ' int2str(input_n) ' (format = ' input_format ')']);
input_data = fscanf(input_file, input_format, [input_n inf])';
fclose(input_file);
inc = input_data(:, 1); % Input data column 1: log income.
persid = input_data(:, 2); % Input data column 2: worker ID.
empid = input_data(:, 3); % Input data column 3: employer ID.
year = input_data(:, 4); % Input data column 4: year.
col_counter = 4;
if age_poly_order
    col_counter = col_counter + 1;
    age = input_data(:, col_counter); % Additional input data: age.
end
if edu_inter
    col_counter = col_counter + 1;
    edu = input_data(:, col_counter); % Additional input data: education.
end
if akm_hours
    col_counter = col_counter + 1;
    hours = input_data(:, col_counter); % Additional input data: hours.
end
if akm_occ
    col_counter = col_counter + 1;
    occ = input_data(:, col_counter); % Additional input data: occupation codes.
end
if akm_tenure
    col_counter = col_counter + 1;
    tenure = input_data(:, col_counter); % Additional input data: tenure length.
end
if akm_exp_act
    col_counter = col_counter + 1;
    exp_act = input_data(:, col_counter); % Additional input data: actual experience (or actual experience interacted with potential experience).
end
clear input_dir input_file input_format input_n input_data col_counter

fprintf('\n  --> summarize objects stored in memory\n')
whos

fprintf('\n  --> descriptive statistics on complete dataset (already restricted to connected set)\n')
n_worker_years = length(inc);
n_workers_unique = length(unique(persid));
n_employers_unique = length(unique(empid));
n_years_unique = length(unique(year));
fprintf('\n')
disp(['total number of worker-years = ' int2str(n_worker_years)]);
fprintf('\n')
disp(['number of unique workers = ' int2str(n_workers_unique)]);
fprintf('\n')
disp(['number of unique employers = ' int2str(n_employers_unique)]);
fprintf('\n')
disp(['number of unique years = ' int2str(n_years_unique)]);
clear n_worker_years n_workers_unique n_employers_unique n_years_unique


fprintf('\n==============================================================')
fprintf('\n ESTIMATE AKM EQUATION')
fprintf('\n==============================================================')
fprintf('\n  --> create unique indicators for workers, employers, years, ages, hours, occupations, tenure, and actual experience \n')
NT = length(inc); % Total number of worker-years.
persid_old = persid;
[~, ~, persid_unique_index] = unique(persid);
clear persid
% N = max(persid_unique_index); % Number of unique elements in persid = number of unique workers.
[~, ~, empid_unique_index] = unique(empid);
clear empid
% J = max(empid_unique_index); % Number of unique elements in empid = number of unique employers.
[~, ~, year_unique_index] = unique(year);
T = max(year_unique_index); % Number of unique elements in year = number of unique years.
if age_poly_order == 1
    age(age >= age_flat_min & age <= age_flat_max) = age_flat_min; % Restrict income-age profile to be flat between ages age_flat_min and age_flat_max.
    clear age_flat_min age_flat_max
    [~, ~, age_unique_index] = unique(age); % Note: Command -unique()- returns unique values of age. Note: Column index (3rd assignment object) maps vector of unique entries (1st assignment object) into the original vector (argument of -unique()-).
    clear age
    N_Age = max(age_unique_index); % Number of unique elements in age = number of unique ages.
end
if edu_inter
    [~, ~, edu_unique_index] = unique(edu);
    clear edu
    if age_poly_order >= 2
        N_Edu = max(edu_unique_index); % Number of unique elements in edu = number of unique education levels.
    end
end
if akm_hours
    [~, ~, hours_unique_index] = unique(hours);
    clear hours
%     N_H = max(hours_unique_index); % Number of unique elements in hours = number of unique contractual hours levels.
end
if akm_occ
    [~, ~, occ_unique_index] = unique(occ);
    clear occ
%     N_O = max(occ_unique_index); % Number of unique elements in occ = number of unique occupation codes.
end
if akm_tenure
    [~, ~, tenure_unique_index] = unique(tenure);
    clear tenure
%     N_Ten = max(tenure_unique_index); % Number of unique elements in tenure = number of unique tenure lengths.
end
if akm_exp_act
    [~, ~, exp_act_unique_index] = unique(exp_act);
    clear exp_act
%     N_ExpA = max(exp_act_unique_index); % Number of unique elements in exp_act = number of unique levels of actual experience.
end

fprintf('\n  --> create worker indicators\n')
W = sparse(1:NT, persid_unique_index', 1); % Dimension: NT x N. Note: This is the only independent variable for which no category is dropped.
W_len = size(W, 2);
clear persid_unique_index

fprintf('\n  --> create employer indicators\n')
F = sparse(1:NT, empid_unique_index', 1);
F = F(:, 2:end); % Drop indicator for first firm, or else collinear with worker effects (will normalize later). Dimension: NT x (J - 1).
F_len = size(F, 2);
clear empid_unique_index

fprintf('\n  --> create (education-specific) year indicators\n')
%education variable as categorized in original data (unless coarsened)
if edu_inter
    Y = sparse(1:NT, T*(edu_unique_index' - 1) + year_unique_index', 1);
    Y(:, T*(edu_unique_index' - 1) + 1) = []; % Drop indicator for first year of every education group, or else collinear with worker effects. Dimension: NT x (T - 1)*N_Edu.
else
    Y = sparse(1:NT, year_unique_index', 1);
    Y = Y(:, 2:end); % Drop indicator for first year, or else collinear with worker effects. Dimension: NT x (T - 1).
    % Y = Y(:, 2:end) - sparse(Y(:, end)*ones(1, size(Y, 2) - 1)); % Work in progress: use Deaton Method as an alternative to previous line: Subtract replication of last column from entire matrix, so year effects sum to zero.  Dimension: NT x (T - 1).
end
Y_len = size(Y, 2);
clear year_unique_index T

fprintf('\n  --> create age indicators or higher-order (2nd and up) terms\n')
if age_poly_order == 1
    if edu_inter
        A = sparse(1:NT, N_Age*(edu_unique_index' - 1) + age_unique_index', 1);
        A(:, N_Age*(edu_unique_index' - 1) + 1) = []; % Drop indicator for first age of every education group, or else collinear with worker effects. Dimension: NT x (N_Age - 1)*N_Edu.
    else
        A = sparse(1:NT, age_unique_index', 1);
        A = A(:, 2:end); % Drop indicator for first age, or else collinear with worker effects. Dimension: NT x (N_Age - 1).
    end
    clear age_unique_index N_Age
    A_len = size(A, 2);
elseif age_poly_order >= 2
    age = (age - age_norm)/age_norm;  % normalize to fit age wage profile
    if edu_inter
        E = sparse(1:NT, edu_unique_index', 1);
        if v_old
            A = zeros(NT, N_Edu*(age_poly_order - 1));            
            for n = 2:age_poly_order
                A(:, N_Edu*(n - 2) + 1:N_Edu*(n - 1)) = bsxfun(@times, E, age.^n);
            end
            A = sparse(A); % Dimension: NT x (N_Age - 1)*N_Edu.
        else
            if age_poly_order == 2
                A = [bsxfun(@times, E, age.^2)]; % Dimension: NT x N_Age*N_Edu.
            elseif age_poly_order == 3
                A = [bsxfun(@times, E, age.^2), bsxfun(@times, E, age.^3)]; % Dimension: NT x N_Age*N_Edu.
            else
                A = repmat(E, 1, age_poly_order - 1).*repmat(age, 1, N_Edu*(age_poly_order - 1)).^kron((2:age_poly_order), ones(1, N_Edu)); % Dimension: NT x N_Age*N_Edu.
            end
        end
        clear E age N_Edu
    else
        if v_old
            A = zeros(NT, age_poly_order - 1);
            for n = 2:age_poly_order
                A(:, n - 1) = age.^n;
            end
            A = sparse(A); % Dimension: NT x (N_Age - 1).
        else
            A = sparse(age.^(2:age_poly_order)); % Dimension: NT x (N_Age - 1).
        end
        clear age
    end
    A_len = size(A, 2);
end
clear age_norm v_old

fprintf('\n  --> create hours indicators\n')
if akm_hours
    H = sparse(1:NT, hours_unique_index', 1);
    H = H(:, 2:end); % Drop indicator for first hours level, or else collinear with worker effects. Dimension: NT x (N_H - 1).
    H_len = size(H, 2);
    clear hours_unique_index
end

fprintf('\n  --> create occupation indicators\n')
if akm_occ
    O = sparse(1:NT, occ_unique_index', 1);
    O = O(:, 2:end); % Drop indicator for first occupation code, or else collinear with worker effects. Dimension: NT x (N_O - 1).
    O_len = size(O, 2);
    clear occ_unique_index
end

fprintf('\n  --> create (education-specific) tenure indicators\n')
if akm_tenure
%     if edu_inter
%         Ten = sparse(1:NT, N_Ten*(edu_unique_index' - 1) + tenure_unique_index', 1);
%         Ten(:, N_Ten*(edu_unique_index' - 1) + 1) = []; % Drop indicator for first tenure level of every education group, or else collinear with worker effects. Dimension: NT x (N_Ten - 1)*N_Edu.
%     else
        Ten = sparse(1:NT, tenure_unique_index', 1);
        Ten = Ten(:, 2:end); % Drop indicator for first tenure level, or else collinear with worker effects. Dimension: NT x (N_Ten - 1).
%     end
    Ten_len = size(Ten, 2);
    clear tenure_unique_index
end

fprintf('\n  --> create (education-specific) actual experience indicators\n')
if akm_exp_act
%     if edu_inter
%         ExpA = sparse(1:NT, N_ExpA*(edu_unique_index' - 1) + exp_act_unique_index', 1);
%         ExpA(:, N_ExpA*(edu_unique_index' - 1) + 1) = []; % Drop indicator for first actual experience level of every education group, or else collinear with worker effects.  Dimension: NT x (N_ExpA - 1)*N_Edu.
%     else
        ExpA = sparse(1:NT, exp_act_unique_index', 1);
        ExpA = ExpA(:, 2:end); % Drop indicator for first actual experience level, or else collinear with worker effects. Dimension: NT x (N_ExpA - 1).
%     end
    ExpA_len = size(ExpA, 2);
    clear exp_act_unique_index
end

fprintf('\n  --> clear education index variable that is no longer needed\n')
if edu_inter
    clear edu_unique_index
end

fprintf('\n  --> generate design matrix (X) and Gramian matrix (X_prime*X)\n')
X = [W, F, Y];
X_len = W_len + F_len + Y_len; % = N + (J - 1) + (T - 1).
if age_poly_order >= 1
    X = [X, A];
    X_len = X_len + A_len;
end
if akm_hours
    X = [X, H];
    X_len = X_len + H_len;
end
if akm_occ
    X = [X, O];
    X_len = X_len + O_len;
end
if akm_tenure
    X = [X, Ten];
    X_len = X_len + Ten_len;
end
if akm_exp_act
    X = [X, ExpA];
    X_len = X_len + ExpA_len;
end
fprintf('\n')
disp(['dimensions of the design matrix (X) = ' num2str(NT) 'x' num2str(X_len)])
fprintf('\n')
disp(['dimensions of the Gramian matrix (X_prime*X) = ' num2str(X_len) 'x' num2str(X_len)])
fprintf('\n')
disp(['dimensions of the moment matrix (X_prime*y) = ' num2str(X_len) 'x' num2str(1)])
clear NT

try
    fprintf('\n  --> perform incomplete Cholesky factorization of Gramian matrix (X_prime*X) as preconditioner for matrix inversion\n')
    L = ichol(X'*X, struct('type', 'ict', 'droptol', 1e-2, 'diagcomp', 1e-1)); % Note: Command -ichol()- uses iterative approximation algorithm.

    fprintf('\n  --> create coefficient vector\n')
    b = pcg(X'*X, X'*inc, 1e-10, 1e3, L, L'); % Note: Command -pcg()- uses preconditioned conjugate gradients method to compute b = (X'*X)^-1*X'*inc. Dimension: N + (J - 1) + (T - 1)*N_Edu + ... if edu_inter, or else N + (J - 1) + (T - 1) + ....
    fprintf('\n')
    disp(['dimensions of the coefficient vector (b) = ' num2str(X_len) 'x' num2str(1)])
    clear L X X_len
catch
    fprintf('\n  --> if Cholesky factorization fails, use preconditioned conjugate gradients method directly\n')
    b=pcg(X'*X, X'*inc, 1e-10, 100000);
    fprintf('\n')
    disp(['dimensions of the coefficient vector (b) = ' num2str(X_len) 'x' num2str(1)])
    clear L X X_len
end

fprintf('\n  --> summarize objects stored in memory\n')
whos

fprintf('\n  --> extract estimated paramters\n')
index_start = 1; % Beginning of index for worker fixed effects.
index_end = W_len; % End of index for worker fixed effects.
pe = full(W*b(index_start:index_end)); % Worker fixed effects = N worker indicators * estimated worker returns vector. Dimension: NT x 1.
clear W W_len
index_start = index_end + 1; % Beginning of index for employer fixed effects.
index_end = index_end + F_len; % End of index for employer fixed effects.
fe = full(F*b(index_start:index_end)); % Employer fixed effects (first one dropped) = J - 1 employer indicators (first one dropped) * estimated employer returns vector. Dimension: NT x 1.
clear F F_len
index_start = index_end + 1; % Beginning of index for (education-specific) year fixed effects.
index_end = index_end + Y_len; % End of index for (education-specific) year fixed effects.
xb_y = full(Y*b(index_start:index_end)); % Year fixed effects = T - 1 year indicators (first one dropped) * estimated year returns vector. Dimension: NT x 1.
clear Y Y_len
if age_poly_order >= 1
    index_start = index_end + 1; % Beginning of index for (education-specific) age fixed effects.
    index_end = index_end + A_len; % End of index for (education-specific) age fixed effects.
    xb_a = full(A*b(index_start:index_end)); % Age fixed effects or higher-order age terms = N_Age - 1 age indicators or higher-order age terms * estimated age returns vector. Dimension: NT x 1.
    clear A A_len
end
if akm_hours
    index_start = index_end + 1; % Beginning of index for hours fixed effects.
    index_end = index_end + H_len; % End of index for hours fixed effects.
    xb_h = full(H*b(index_start:index_end)); % Hours fixed effects.
    clear H H_len
end
if akm_occ
    index_start = index_end + 1; % Beginning of index for occupation fixed effects.
    index_end = index_end + O_len; % End of index for occupation fixed effects.
    xb_o = full(O*b(index_start:index_end)); % Occupation fixed effects.
    clear O O_len
end
if akm_tenure
    index_start = index_end + 1; % Beginning of index for tenure fixed effects.
    index_end = index_end + Ten_len; % End of index for tenure fixed effects.
    xb_ten = full(Ten*b(index_start:index_end)); % Tenure fixed effects.
    clear Ten Ten_len
end
if akm_exp_act
    index_start = index_end + 1; % Beginning of index for actual experience fixed effects.
    index_end = index_end + ExpA_len; % End of index for actual experience fixed effects.
    xb_expa = full(ExpA*b(index_start:index_end)); % Actual experience fixed effects.
    clear ExpA ExpA_len
end
clear b index_start index_end


fprintf('\n==============================================================')
fprintf('\n ANALYZE ESTIMATION RESULTS')
fprintf('\n==============================================================')
fprintf('\n  --> normalize worker and employer fixed effect distributions such that worker fixed effects are mean zero\n')
m_pe = mean(pe);
pe = pe - m_pe;
fe = fe + m_pe;
clear m_pe

fprintf('\n  --> means of worker and employer fixed effects\n')
m = mean([pe, fe]);
disp(m)
clear m

fprintf('\n  --> create AKM residual\n')
resid = inc - pe - fe - xb_y;
if age_poly_order >= 1
    resid = resid - xb_a;
end
if akm_hours
    resid = resid - xb_h;
end
if akm_occ
    resid = resid - xb_o;
end
if akm_tenure
    resid = resid - xb_ten;
end
if akm_exp_act
    resid = resid - xb_expa;
end

fprintf('\n  --> full covariance matrix of estimated AKM components\n')
cov_mat = [inc, pe, fe, xb_y];
clear inc
if age_poly_order >= 1
    cov_mat = [cov_mat, xb_a];
end
if akm_hours
    cov_mat = [cov_mat, xb_h];
end
if akm_occ
    cov_mat = [cov_mat, xb_o];
end
if akm_tenure
    cov_mat = [cov_mat, xb_ten];
end
if akm_exp_act
    cov_mat = [cov_mat, xb_expa];
end
cov_mat = [cov_mat, resid];
C = full(cov(cov_mat));
clear cov_mat resid
cov_str = ' income    person    employer  year';
if edu_inter
    cov_str = strcat(cov_str, ' edu-year');
else
    cov_str = strcat(cov_str, '     year');
end
if age_poly_order >= 1
    if edu_inter
        cov_str = strcat(cov_str, '      edu-age');
    else
        cov_str = strcat(cov_str, '          age');
    end
end
if akm_hours
    cov_str = strcat(cov_str, '   hours');
end
if akm_occ
    cov_str = strcat(cov_str, '    occup');
end
if akm_tenure
%     if edu_inter
%         cov_str = strcat(cov_str, ' edu-tenure');
%     else
        cov_str = strcat(cov_str, '    tenure');
%     end
end
if akm_exp_act
%     if edu_inter
%         cov_str = strcat(cov_str, ' edu-exp_act');
%     else
        cov_str = strcat(cov_str, '    exp_act');
%     end
end
cov_str = strcat(cov_str, '       residual');
fprintf('\n')
disp(cov_str)
disp(C)
var_y = C(1, 1);
var_y_share = 100*var_y/var_y;
var_pe = C(2, 2);
var_pe_share = 100*var_pe/var_y;
var_fe = C(3, 3);
var_fe_share = 100*var_fe/var_y;
var_year = C(4, 4);
var_year_share = 100*var_year/var_y;
cov_index = 4;
cov_sum_2 = var_y - var_pe - var_fe - var_year;
if age_poly_order >= 1
    cov_index = cov_index + 1;
    var_age = C(cov_index, cov_index);
    var_age_share = 100*var_age/var_y;
    cov_sum_2 = cov_sum_2 - var_age;
end
if akm_hours
    cov_index = cov_index + 1;
    var_hours = C(cov_index, cov_index);
    var_hours_share = 100*var_hours/var_y;
    cov_sum_2 = cov_sum_2 - var_hours;
end
if akm_occ
    cov_index = cov_index + 1;
    var_occ = C(cov_index, cov_index);
    var_occ_share = 100*var_occ/var_y;
    cov_sum_2 = cov_sum_2 - var_occ;
end
if akm_tenure
    cov_index = cov_index + 1;
    var_tenure = C(cov_index, cov_index);
    var_tenure_share = 100*var_tenure/var_y;
    cov_sum_2 = cov_sum_2 - var_tenure;
end
if akm_exp_act
    cov_index = cov_index + 1;
    var_exp_act = C(cov_index, cov_index);
    var_exp_act_share = 100*var_exp_act/var_y;
    cov_sum_2 = cov_sum_2 - var_exp_act;
end
cov_index = cov_index + 1;
var_resid = C(cov_index, cov_index);
var_resid_share = 100*var_resid/var_y;
cov_sum_2 = cov_sum_2 - var_resid;
cov_sum_2_share = 100*cov_sum_2/var_y;
clear cov_str C cov_index

fprintf('\n  --> variance decomposition\n')
disp(['Var(income) = ' num2str(var_y) ' (' num2str(var_y_share) '%)'])
disp(['Var(worker FE) = ' num2str(var_pe) ' (' num2str(var_pe_share) '%)'])
disp(['Var(employer FE) = ' num2str(var_fe) ' (' num2str(var_fe_share) '%)'])
if edu_inter
    disp(['Var(edu-year FE) = ' num2str(var_year) ' (' num2str(var_year_share) '%)'])
else
    disp(['Var(year FE) = ' num2str(var_year) ' (' num2str(var_year_share) '%)'])
end
if age_poly_order >= 1
    if edu_inter
        disp(['Var(edu-age FE) = ' num2str(var_age) ' (' num2str(var_age_share) '%)'])
    else
        disp(['Var(age FE) = ' num2str(var_age) ' (' num2str(var_age_share) '%)'])
    end
    clear var_age var_age_share
end
if akm_hours
    disp(['Var(hours FE) = ' num2str(var_hours) ' (' num2str(var_hours_share) '%)'])
    clear var_hours var_hours_share
end
if akm_occ
    disp(['Var(occ FE) = ' num2str(var_occ) ' (' num2str(var_occ_share) '%)'])
    clear var_occ var_occ_share
end
if akm_tenure
%     if edu_inter
%         disp(['Var(edu-tenure FE) = ' num2str(var_tenure) ' (' num2str(var_tenure_share) '%)'])
%     else
        disp(['Var(tenure FE) = ' num2str(var_tenure) ' (' num2str(var_tenure_share) '%)'])
%     end
    clear var_tenure var_tenure_share
end
if akm_exp_act
%     if edu_inter
%         disp(['Var(edu-actual exp FE) = ' num2str(var_exp_act) ' (' num2str(var_exp_act_share) '%)'])
%     else
        disp(['Var(actual exp FE) = ' num2str(var_exp_act) ' (' num2str(var_exp_act_share) '%)'])
%     end
    clear var_exp_act var_exp_act_share
end
disp(['2*sum(Cov(.)) = ' num2str(cov_sum_2) ' (' num2str(cov_sum_2_share) '%)'])
disp(['Var(resid) = ' num2str(var_resid) ' (' num2str(var_resid_share) '%)'])
clear var_y var_y_share var_pe var_pe_share var_fe var_fe_share var_year var_year_share edu_inter cov_sum_2 cov_sum_2_share var_resid var_resid_share

fprintf('\n  --> correlation coefficient between AKM worker and employer fixed effects\n');
rho = corr(pe, fe);
disp(rho)
fprintf('\n')
clear rho

fprintf('\n  --> write estimation results to tab-delimited output file\n')
output_file = fopen(output_dir, 'w');
out_header_format = '%10s\t %2s\t %6s\t %6s\t %6s';
out_header = {'persid', 'y', 'pe', 'fe', 'xb_y'};
out_data_format = '%10.0f\t %2.0f\t %6.4f\t %6.4f\t %6.4f';
out_data = [persid_old'; year'; pe'; fe'; xb_y'];
clear output_dir persid_old year pe fe xb_y
if age_poly_order >= 1
    out_header_format = strcat(out_header_format, '\t %6s');
    out_header{end + 1} = 'xb_a';
    out_data_format = strcat(out_data_format, '\t %6.4f');
    out_data = [out_data; xb_a'];
    clear xb_a
end
if akm_hours
    out_header_format = strcat(out_header_format, '\t %6s');
    out_header{end + 1} = 'xb_h';
    out_data_format = strcat(out_data_format, '\t %6.4f');
    out_data = [out_data; xb_h'];
    clear xb_h
end
if akm_occ
    out_header_format = strcat(out_header_format, '\t %6s');
    out_header{end + 1} = 'xb_o';
    out_data_format = strcat(out_data_format, '\t %6.4f');
    out_data = [out_data; xb_o'];
    clear xb_o
end
if akm_tenure
    out_header_format = strcat(out_header_format, '\t %6s');
    out_header{end + 1} = 'xb_ten';
    out_data_format = strcat(out_data_format, '\t %6.4f');
    out_data = [out_data; xb_ten'];
    clear xb_ten
end
if akm_exp_act
    out_header_format = strcat(out_header_format, '\t %6s');
    out_header{end + 1} = 'xb_exp';
    out_data_format = strcat(out_data_format, '\t %6.4f');
    out_data = [out_data; xb_expa'];
    clear xb_expa
end
out_header_format = strcat(out_header_format, '\n');
out_data_format = strcat(out_data_format, '\n');
fprintf(output_file, out_header_format, out_header{:});
fprintf(output_file, out_data_format, out_data);
fclose(output_file);
clear out_header_format out_header out_data_format out_data output_file age_poly_order akm_hours akm_occ akm_tenure akm_exp_act


fprintf('\n==============================================================')
fprintf('\n CLOSING HOUSEKEEPING')
fprintf('\n==============================================================')

fid = fopen(stop_dir, 'w+');
fclose(fid);

fprintf('\n  --> summarize objects stored in memory\n')
whos

fprintf('\n  --> clear parameters input\n')
fclose('all');
eval(['delete ', params_dir])
clear params_dir

fprintf('\n  --> clear memory\n')
clear

fprintf('\n  --> end timer\n')
toc

fprintf('\n  --> close diary (log) file\n')
fprintf('\n\n\n\n\n\n\n\n\n\n\n\n')
diary close

fprintf('\n  --> exit\n')
exit